% Copyright 2016 Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia
% Corporation, the U.S. Government retains certain rights in this software

% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are
% met:
% 
%     (1) Redistributions of source code must retain the above copyright
%     notice, this list of conditions and the following disclaimer. 
% 
%     (2) Redistributions in binary form must reproduce the above copyright
%     notice, this list of conditions and the following disclaimer in
%     the documentation and/or other materials provided with the
%     distribution.  
%     
%     (3)The name of the author may not be used to
%     endorse or promote products derived from this software without
%     specific prior written permission.
% 
% THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
% IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
% WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
% DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
% INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
% (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
% SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
% HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
% STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
% IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
% POSSIBILITY OF SUCH DAMAGE.

function [deconData,corrVals] = despikeZhang(data,m,sigmaThresholds,referenceSpectra)

%FUNCTION [deconData,corrVals] = despikeZhang(data,m,sigmaThresholds,referenceSpectra)
%
% PURPOSE:
%	despikeZhang.m is designed to remove cosmic ray spikes from
%	spectra. 
%
% DEPENDENCIES:
%	despikeMedian.m
%
% CLASSES:
%	-None-
%
% INPUTS:
%	data:
%		An m by n x p matrix, where each of the n columns corresponds to a 
%		spectrum. The data is expected to be contaminated with cosmic
%		spikes. The data corresponds to spectral information for each pixel
%		in an n x p image. 
%	m:
%		A variable specifying the size of the median filter span, which is 
%		set at 2*m+1. 
%	sigmaThresholds:
%		Scaling factors that determines how aggressively spikes are
%		removed, adjustable parameter of the algorithm. This algorithm
%		needs two thresholds, one low and one high. 
%	referenceSpectra:	(optional)
%		The pure spectra used to refine the nearest neighbor match. 
%
% OUTPUTS:
%	deconData:
%		An m by (n x p) matrix, where each of the (n x p) columns 
%		corresponds to a spectrum. In the filtered output, the cosmic 
%		spikes in the original data have hopefully been suppressed. 
%	corrVals:
%		An n x p matrix, indicating the correlation coefficient of the
%		most-correlated nearest-neighbor for each pixel. 
%
% REFERENCES:
%   This code was designed as an implementation of the following reference:
%	1)	Zhang, L. and M. J. Henson (2007). "A practical algorithm to 
%		remove cosmic spikes in Raman imaging data for pharmaceutical 
%		applications." Applied Spectroscopy 61(9): 1015-1020.
%
% PROGRAMMER COMMENTS:
%	1)	This code runs best if reference spectra for all spectral 
%		components are provided. Intriquingly, if not all spectral 
%		components can be provided, this code appears to run better if no 
%		components are provided than if only some are. 
%	2)	When operating a line-scan spectral imager, cosmic rays are much
%		more likely to be present at the same wavelength in adjacent
%		spectra measured at the same time. In such cases, the reference
%		recommends only examining nearest neighbors acquired at different
%		times. This code does not implement that process, as it was not
%		relevant for the simulated data. 
%
% LIMITATIONS:
%	This code uses the nearest-neighbor spectra that is most highly
%	correlated with each spectrum. Therefore, this code likely performs
%	better if the data is spatially oversampled, and may perform worse if a
%	cosmic ray spike occurs in the same location in two spectra. 

% Revision:
%	Sandia Hyper-UBS 1.0.0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Add default. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%Determine the original data size. 
dataSize = size(data);

if ~exist('referenceSpectra','var')
	referenceSpectra = zeros(dataSize(1),0);
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Apply the intial smoothing and determine outliers. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%Convert from a 3D array to a 2D array. 
data2D = reshape(data,dataSize(1),[]);

%Apply a median filter, generating a data set to use for the identification
%of the most highly correlated neighboring spectrum. 
medianData = despikeMedian(data2D,m); 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Determine the cross-correlation coefficients. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%All cross correlation coefficients
corrCoef = corr(medianData);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Determine the self, nearest-neighbor pairs of indices. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%The indices of the central pixel. We want this reproduced 8-fold. 
nPixels = dataSize(2)*dataSize(3);
centerIndices = (1:nPixels)'*ones(1,8);

%The offsets to the indices to get neighboring pixels. Some of these may
%end up outside the image or wrapping around an edge. Those cases will be
%corrected for at a later point. 
offsets = [-dataSize(2)-1 -dataSize(2) -dataSize(2)+1 -1 +1 dataSize(2)-1 dataSize(2) dataSize(2)+1];
%The neighbor indices:
neighborIndices = ones(nPixels,1)*offsets+centerIndices;

%Determine which of the neigbor indices are beyond the range of the image
%indices. 
relevant = neighborIndices>=1 & neighborIndices<=nPixels;

%Determine which of the neighbor indices, when converted to subscripts, do
%not have subscripts within 1 of the original subscripts. These must extend
%across an edge and are not legitimate. 
%
%Determine the subscripts of all possible neighbors and all original
%pixels. 
[neighRow,neighCol] = ind2sub(dataSize(2:3),neighborIndices(:));
[selfRow,selfCol] = ind2sub(dataSize(2:3),centerIndices(:));
%Any legitimate neighbors will have subscripts +/-1 from the subscripts of
%the center indices. 
notEdge = abs(neighRow-selfRow)<=1 & abs(neighCol-selfCol)<=1;

%These indicate the positions in neighborIndices which are relevant (not
%extending around an edge of the image). 
relevant = relevant & reshape(notEdge,size(relevant));

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Determine which nearest neighbor has the highest cross correlation
%coefficients. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%The minimum possible value of a cross-correlation coefficient is -1.
%Therefore, if we initialize the matrix to -2, any entries which are not
%populated (because they extend beyond the edge of the image) will never be
%selected as the best nearest neighbor. 
nearCorr = ones(nPixels,8)*-2;

%For relevant entries, determine the cross correlation coefficient. 
coefficientIndices = sub2ind([nPixels nPixels],centerIndices(relevant),neighborIndices(relevant));
nearCorr(relevant) = corrCoef(coefficientIndices);

%Determine the location and values of the most correlated nearest
%neighbors. 
[corrVals,offsetLoc] = max(nearCorr,[],2);

%Construct a matrix indicating the values of the best correlation for each
%pixel. 
corrVals = reshape(corrVals,dataSize(2:3));

%Determine the index of the nearest neighbor for each pixel
neighborIndicesIndex = sub2ind([nPixels 8],(1:nPixels)',offsetLoc);
neighborIndex = neighborIndices(neighborIndicesIndex);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Loop through all spectra, identifying any contaminated pixels and
%replacing them with their counterpart from their most-correlated
%nearest-neighbor.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%Initialize as the existing spectra
deconData = data2D;

%Initialize a function
expandWindows = @(x) (x(1):x(2))';

for j=1:nPixels
	%Extract the two spectra
	originalSpectrum = data2D(:,j);
	neighborSpectrum = data2D(:,neighborIndex(j));
	
	%Refine the neighboring spectrum using a robust linear regression
	modelSpectrum = [neighborSpectrum referenceSpectra];
	[b,stats] = robustfit(modelSpectrum,originalSpectrum,'bisquare');
	neighborMatch = modelSpectrum*b(2:end) + b(1);
	
	%The noise estimate for this algorithm is the standard deviation from
	%the linear regression. 
	noiseEst = stats.s; 
	
	%Determine resdiuals and which residuals are over each threshold. 
	scaledResiduals = (originalSpectrum-neighborMatch)/noiseEst;
	aboveLower = scaledResiduals>sigmaThresholds(1);
	aboveUpper = scaledResiduals>sigmaThresholds(2);
	
	%Find all windows
	lowerWindows = [find(diff([0; aboveLower])==1) find(diff([aboveLower; 0])==-1)];
	upperWindows = [find(diff([0; aboveUpper])==1) find(diff([aboveUpper; 0])==-1)];
	
	%We only care about those lower windows that contain a peak in the
	%upper window. 
	nWindows = size(upperWindows,1); 
	completeWindows = cell(nWindows,1);
	for k=1:nWindows
		relevantIndex = find(lowerWindows(:,1)<=upperWindows(k,1),1,'last');
		completeWindows{k} = lowerWindows(relevantIndex,:);
	end
	
	%Generate the listing of all wavelength indices to replace.
	replace = cell2mat(cellfun(expandWindows,completeWindows,'UniformOutput',false));
	
	%Replace these with the relevant wavelengths from the approximation
	%spectrum from the neighbor. 
	deconData(replace,j) = neighborMatch(replace);
end

